package com.atguigu.app.dws;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.atguigu.bean.TrafficHomeDetailPageViewBean;
import com.atguigu.utils.DateFormatUtil;
import com.atguigu.utils.MyClickHouseUtil;
import com.atguigu.utils.MyKafkaUtil;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.AllWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;

import java.time.Duration;

//数据流：web/app -> Nginx -> 日志服务器(log) -> Flume -> Kafka(ODS) -> FlinkApp -> Kafka(DWD) -> FlinkApp -> ClickHouse(DWS)
//程  序：  Mock -> Flume(f1.sh) -> Kafka(ZK) -> BaseLogApp -> Kafka(ZK) -> DwsTrafficPageViewWindow -> ClickHouse(ZK)
public class DwsTrafficPageViewWindow {

    public static void main(String[] args) throws Exception {

        //TODO 1.获取执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        //启用状态后端
//        env.enableCheckpointing(3000L, CheckpointingMode.EXACTLY_ONCE);
//        env.getCheckpointConfig().setCheckpointTimeout(60 * 1000L);
//        env.getCheckpointConfig().enableExternalizedCheckpoints(
//                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION
//        );
//        env.getCheckpointConfig().setMinPauseBetweenCheckpoints(3000L);
//        env.setRestartStrategy(
//                RestartStrategies.failureRateRestart(3, Time.days(1L), Time.minutes(3L))
//        );
//        env.setStateBackend(new HashMapStateBackend());
//        env.getCheckpointConfig().setCheckpointStorage("hdfs://hadoop102:8020/ck");
//        System.setProperty("HADOOP_USER_NAME", "atguigu");

        //TODO 2.读取Kafka 页面日志主题数据创建流
        String groupId = "page_view_window_211227";
        String pageTopic = "dwd_traffic_page_log";
        DataStreamSource<String> kafkaDS = env.addSource(MyKafkaUtil.getFlinkKafkaConsumer(pageTopic, groupId));

        //TODO 3.转换为JSON对象&过滤数据
        SingleOutputStreamOperator<JSONObject> jsonObjDS = kafkaDS.flatMap(new FlatMapFunction<String, JSONObject>() {
            @Override
            public void flatMap(String value, Collector<JSONObject> out) throws Exception {
                JSONObject jsonObject = JSON.parseObject(value);
                String pageId = jsonObject.getJSONObject("page").getString("page_id");
                if ("good_detail".equals(pageId) || "home".equals(pageId)) {
                    out.collect(jsonObject);
                }
            }
        });

        //TODO 4.提取时间戳生成WaterMark
        SingleOutputStreamOperator<JSONObject> jsonObjWithWmDS = jsonObjDS.assignTimestampsAndWatermarks(WatermarkStrategy.<JSONObject>forBoundedOutOfOrderness(Duration.ofSeconds(2)).withTimestampAssigner(new SerializableTimestampAssigner<JSONObject>() {
            @Override
            public long extractTimestamp(JSONObject element, long recordTimestamp) {
                return element.getLong("ts");
            }
        }));

        //TODO 5.按照Mid分组并且去重数据同时转换数据为JavaBean对象
        SingleOutputStreamOperator<TrafficHomeDetailPageViewBean> trafficHomeDetailDS = jsonObjWithWmDS
                .keyBy(json -> json.getJSONObject("common").getString("mid"))
                .flatMap(new RichFlatMapFunction<JSONObject, TrafficHomeDetailPageViewBean>() {

                    private ValueState<String> homeLastDtState;
                    private ValueState<String> detailLastDtState;

                    @Override
                    public void open(Configuration parameters) throws Exception {
                        ValueStateDescriptor<String> homeDescriptor = new ValueStateDescriptor<>("home", String.class);
                        ValueStateDescriptor<String> detailDescriptor = new ValueStateDescriptor<>("detail", String.class);

                        StateTtlConfig stateTtlConfig = new StateTtlConfig.Builder(Time.days(1))
                                .setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)
                                .build();

                        homeDescriptor.enableTimeToLive(stateTtlConfig);
                        detailDescriptor.enableTimeToLive(stateTtlConfig);

                        homeLastDtState = getRuntimeContext().getState(homeDescriptor);
                        detailLastDtState = getRuntimeContext().getState(detailDescriptor);
                    }

                    @Override
                    public void flatMap(JSONObject value, Collector<TrafficHomeDetailPageViewBean> out) throws Exception {

                        //获取状态数据日期&当前数据日期
                        String homeLastDt = homeLastDtState.value();
                        String detailLastDt = detailLastDtState.value();
                        Long ts = value.getLong("ts");
                        String curDt = DateFormatUtil.toDate(ts);

                        //取出页面信息
                        String pageId = value.getJSONObject("page").getString("page_id");

                        //定义首页及商品详情页的访客人数
                        long homeUvCt = 0L;
                        long goodDetailUvCt = 0L;

                        //判断是首页还是商品详情页
                        if ("home".equals(pageId)) {
                            if (homeLastDt == null || !homeLastDt.equals(curDt)) {
                                homeUvCt = 1L;
                                homeLastDtState.update(curDt);
                            }
                        } else {
                            if (detailLastDt == null || !detailLastDt.equals(curDt)) {
                                goodDetailUvCt = 1L;
                                detailLastDtState.update(curDt);
                            }
                        }

                        //访客人数同时为0,则过滤掉
                        if (homeUvCt == 1L || goodDetailUvCt == 1L) {
                            out.collect(new TrafficHomeDetailPageViewBean("", "",
                                    homeUvCt,
                                    goodDetailUvCt,
                                    ts));
                        }
                    }
                });

        //TODO 6.开窗、聚合
        SingleOutputStreamOperator<TrafficHomeDetailPageViewBean> resultDS = trafficHomeDetailDS.windowAll(TumblingEventTimeWindows.of(org.apache.flink.streaming.api.windowing.time.Time.seconds(10))).reduce(new ReduceFunction<TrafficHomeDetailPageViewBean>() {
            @Override
            public TrafficHomeDetailPageViewBean reduce(TrafficHomeDetailPageViewBean value1, TrafficHomeDetailPageViewBean value2) throws Exception {
                value1.setHomeUvCt(value1.getHomeUvCt() + value2.getHomeUvCt());
                value1.setGoodDetailUvCt(value1.getGoodDetailUvCt() + value2.getGoodDetailUvCt());
                return value1;
            }
        }, new AllWindowFunction<TrafficHomeDetailPageViewBean, TrafficHomeDetailPageViewBean, TimeWindow>() {
            @Override
            public void apply(TimeWindow window, Iterable<TrafficHomeDetailPageViewBean> values, Collector<TrafficHomeDetailPageViewBean> out) throws Exception {

                //获取数据
                TrafficHomeDetailPageViewBean pageViewBean = values.iterator().next();

                //设置信息
                pageViewBean.setTs(System.currentTimeMillis());
                pageViewBean.setStt(DateFormatUtil.toYmdHms(window.getStart()));
                pageViewBean.setEdt(DateFormatUtil.toYmdHms(window.getEnd()));

                //输出数据
                out.collect(pageViewBean);
            }
        });

        //TODO 7.将数据写出到ClickHouse
        resultDS.print(">>>>>>>>>>");
        resultDS.addSink(MyClickHouseUtil.getSink("insert into dws_traffic_page_view_window values(?,?,?,?,?)"));

        //TODO 8.启动任务
        env.execute("DwsTrafficPageViewWindow");
    }

}
